In [1]:
cd ../..
/code
In [2]:
%run "source/config/notebook_settings.py"
import os
import mlflow
from mlflow.tracking import MlflowClient
from helpsk.utility import read_pickle
import helpsk as hlp

from source.library.utilities import Timer, log_info, get_config

config = get_config()
mlflow_uri = config['MLFLOW']['URI']
log_info(f"MLFlow URI: {mlflow_uri}")

client = MlflowClient(tracking_uri=mlflow_uri)
2022-06-15 02:01:36 - INFO     | MLFlow URI: http://mlflow_server:1235

Get Latest Experiment Run from MLFlow¶

In [3]:
# Get the production model version and actual model
production_model_info = client.get_latest_versions(name=config['MLFLOW']['MODEL_NAME'], stages=['Production'])
assert len(production_model_info) == 1
production_model_info = production_model_info[0]
production_model = read_pickle(client.download_artifacts(
    run_id=production_model_info.run_id,
    path='model/model.pkl'
))
log_info(f"Production Model Version: {production_model_info.version}")
2022-06-15 02:01:36 - INFO     | Production Model Version: 2
In [4]:
# get experiment and latest run info
credit_experiment = client.get_experiment_by_name(name=config['MLFLOW']['EXPERIMENT_NAME'])
runs = client.list_run_infos(experiment_id=credit_experiment.experiment_id)
latest_run = runs[np.argmax([x.start_time for x in runs])]
In [5]:
yaml_path = client.download_artifacts(run_id=latest_run.run_id, path='experiment.yaml')
results = hlp.sklearn_eval.MLExperimentResults.from_yaml_file(yaml_file_name = yaml_path)
In [6]:
# get the best estimator from the BayesSearchCV
best_estimator = read_pickle(client.download_artifacts(
    run_id=latest_run.run_id,
    path='model/model.pkl'
))
In [7]:
best_estimator.model
Out[7]:
Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                  ('scaler',
                                                                   TransformerChooser()),
                                                                  ('pca',
                                                                   TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                  ['duration', 'credit_amount',
                                                   'installment_commitment',
                                                   'residence_since', 'age',
                                                   'existing_credi...
                                                   'employment',
                                                   'personal_status',
                                                   'other_parties',
                                                   'property_magnitude',
                                                   'other_payment_plans',
                                                   'housing', 'job',
                                                   'own_telephone',
                                                   'foreign_worker'])])),
                ('model',
                 RandomForestClassifier(criterion='entropy', max_depth=99,
                                        max_features=0.031837350792579364,
                                        max_samples=0.9248344222191298,
                                        min_samples_leaf=4,
                                        min_samples_split=16, n_estimators=1235,
                                        random_state=42))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                                  ('scaler',
                                                                   TransformerChooser()),
                                                                  ('pca',
                                                                   TransformerChooser(transformer=PCA(n_components='mle')))]),
                                                  ['duration', 'credit_amount',
                                                   'installment_commitment',
                                                   'residence_since', 'age',
                                                   'existing_credi...
                                                   'employment',
                                                   'personal_status',
                                                   'other_parties',
                                                   'property_magnitude',
                                                   'other_payment_plans',
                                                   'housing', 'job',
                                                   'own_telephone',
                                                   'foreign_worker'])])),
                ('model',
                 RandomForestClassifier(criterion='entropy', max_depth=99,
                                        max_features=0.031837350792579364,
                                        max_samples=0.9248344222191298,
                                        min_samples_leaf=4,
                                        min_samples_split=16, n_estimators=1235,
                                        random_state=42))])
ColumnTransformer(transformers=[('numeric',
                                 Pipeline(steps=[('imputer',
                                                  TransformerChooser(transformer=SimpleImputer(strategy='median'))),
                                                 ('scaler',
                                                  TransformerChooser()),
                                                 ('pca',
                                                  TransformerChooser(transformer=PCA(n_components='mle')))]),
                                 ['duration', 'credit_amount',
                                  'installment_commitment', 'residence_since',
                                  'age', 'existing_credits',
                                  'num_dependents']),
                                ('non_numeric',
                                 Pipeline(steps=[('encoder',
                                                  TransformerChooser(transformer=CustomOrdinalEncoder()))]),
                                 ['checking_status', 'credit_history',
                                  'purpose', 'savings_status', 'employment',
                                  'personal_status', 'other_parties',
                                  'property_magnitude', 'other_payment_plans',
                                  'housing', 'job', 'own_telephone',
                                  'foreign_worker'])])
['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer(strategy='median'))
SimpleImputer(strategy='median')
SimpleImputer(strategy='median')
TransformerChooser()
TransformerChooser(transformer=PCA(n_components='mle'))
PCA(n_components='mle')
PCA(n_components='mle')
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=CustomOrdinalEncoder())
CustomOrdinalEncoder()
CustomOrdinalEncoder()
RandomForestClassifier(criterion='entropy', max_depth=99,
                       max_features=0.031837350792579364,
                       max_samples=0.9248344222191298, min_samples_leaf=4,
                       min_samples_split=16, n_estimators=1235,
                       random_state=42)

Training & Test Data Info¶

In [8]:
client.download_artifacts(run_id=latest_run.run_id, path='x_train.pkl')
Out[8]:
'/code/mlflow-artifact-root/1/9f1d6c430713465e91d46d56afc18e11/artifacts/x_train.pkl'
In [9]:
with Timer("Loading training/test datasets"):
    X_train = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='x_train.pkl'))
    X_test = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='x_test.pkl'))
    y_train = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='y_train.pkl'))
    y_test = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='y_test.pkl'))
2022-06-15 02:01:37 - INFO     | *****Timer Started: Loading training/test datasets
2022-06-15 02:01:37 - INFO     | *****Timer Finished (0.04 seconds)
In [10]:
log_info(X_train.shape)
log_info(len(y_train))

log_info(X_test.shape)
log_info(len(y_test))
2022-06-15 02:01:37 - INFO     | (800, 20)
2022-06-15 02:01:37 - INFO     | 800
2022-06-15 02:01:37 - INFO     | (200, 20)
2022-06-15 02:01:37 - INFO     | 200
In [11]:
np.unique(y_train, return_counts=True)
Out[11]:
(array([0, 1]), array([559, 241]))
In [12]:
np.unique(y_train, return_counts=True)[1] / np.sum(np.unique(y_train, return_counts=True)[1])
Out[12]:
array([0.69875, 0.30125])
In [13]:
np.unique(y_test, return_counts=True)[1] / np.sum(np.unique(y_test, return_counts=True)[1])
Out[13]:
array([0.705, 0.295])

Cross Validation Results¶

Best Scores/Params¶

In [14]:
log_info(f"Best Score: {results.best_score}")
2022-06-15 02:01:37 - INFO     | Best Score: 0.7741159200200212
In [15]:
log_info(f"Best Params: {results.best_params}")
2022-06-15 02:01:37 - INFO     | Best Params: {'model': 'RandomForestClassifier()', 'max_features': 0.031837350792579364, 'max_depth': 99, 'n_estimators': 1235, 'min_samples_split': 16, 'min_samples_leaf': 4, 'max_samples': 0.9248344222191298, 'criterion': 'entropy', 'imputer': "SimpleImputer(strategy='median')", 'scaler': 'None', 'pca': "PCA('mle')", 'encoder': 'CustomOrdinalEncoder()'}
In [16]:
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')
Out[16]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion learning_rate min_child_weight subsample colsample_bytree colsample_bylevel reg_alpha reg_lambda imputer scaler pca encoder model_rank
11 1 0.77 0.74 0.81 RandomForestClassifier() NaN 0.03 99.00 1235.00 16.00 4.00 0.92 entropy NaN NaN NaN NaN NaN NaN NaN SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder() 1.00
2 3 0.76 0.71 0.81 LogisticRegression() 0.00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN SimpleImputer(strategy='median') MinMaxScaler() PCA('mle') OneHotEncoder() 1.00
7 6 0.75 0.71 0.80 ExtraTreesClassifier() NaN 0.14 40.00 1489.00 41.00 27.00 0.94 entropy NaN NaN NaN NaN NaN NaN NaN SimpleImputer(strategy='median') None None OneHotEncoder() 1.00
18 10 0.75 0.70 0.79 XGBClassifier() NaN NaN 5.00 1157.00 NaN NaN NaN NaN 0.02 3.00 0.69 0.50 0.73 0.03 2.91 SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder() 1.00
In [17]:
results.to_formatted_dataframe(return_style=True,
                               include_rank=True,
                               num_rows=500)
Out[17]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion learning_rate min_child_weight subsample colsample_bytree colsample_bylevel reg_alpha reg_lambda imputer scaler pca encoder
1 0.774 0.742 0.806 RandomForestClassifier() <NA> 0.032 99.000 1,235.000 16.000 4.000 0.925 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
2 0.761 0.702 0.820 RandomForestClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
3 0.759 0.711 0.807 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') MinMaxScaler() PCA('mle') OneHotEncoder()
4 0.759 0.712 0.806 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') MinMaxScaler() None OneHotEncoder()
5 0.757 0.711 0.803 LogisticRegression() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
6 0.753 0.707 0.800 ExtraTreesClassifier() <NA> 0.137 40.000 1,489.000 41.000 27.000 0.944 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None OneHotEncoder()
7 0.751 0.702 0.800 ExtraTreesClassifier() <NA> 0.784 4.000 1,324.000 47.000 4.000 0.864 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None PCA('mle') OneHotEncoder()
8 0.749 0.724 0.774 ExtraTreesClassifier() <NA> 0.563 71.000 1,725.000 49.000 16.000 0.956 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None CustomOrdinalEncoder()
9 0.748 0.702 0.793 LogisticRegression() 95.634 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
10 0.747 0.704 0.789 XGBClassifier() <NA> <NA> 5.000 1,157.000 <NA> <NA> <NA> <NA> 0.018 3.000 0.694 0.501 0.726 0.033 2.910 SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
11 0.746 0.698 0.794 ExtraTreesClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
12 0.746 0.700 0.791 RandomForestClassifier() <NA> 0.869 98.000 1,537.000 15.000 4.000 0.622 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None CustomOrdinalEncoder()
13 0.745 0.703 0.787 RandomForestClassifier() <NA> 0.757 44.000 745.000 33.000 6.000 0.608 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None CustomOrdinalEncoder()
14 0.742 0.691 0.793 XGBClassifier() <NA> <NA> 2.000 1,671.000 <NA> <NA> <NA> <NA> 0.021 2.000 0.657 0.591 0.780 0.026 3.081 SimpleImputer(strategy='median') None None OneHotEncoder()
15 0.740 0.718 0.761 XGBClassifier() <NA> <NA> 9.000 551.000 <NA> <NA> <NA> <NA> 0.044 20.000 0.675 0.837 0.550 0.018 3.292 SimpleImputer(strategy='median') None None CustomOrdinalEncoder()
16 0.738 0.727 0.748 ExtraTreesClassifier() <NA> 0.861 52.000 1,995.000 33.000 19.000 0.651 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') None None CustomOrdinalEncoder()
17 0.737 0.716 0.757 RandomForestClassifier() <NA> 0.528 70.000 1,003.000 37.000 19.000 0.530 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
18 0.723 0.705 0.741 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') StandardScaler() None CustomOrdinalEncoder()
19 0.714 0.648 0.779 XGBClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
20 0.712 0.704 0.719 XGBClassifier() <NA> <NA> 16.000 1,642.000 <NA> <NA> <NA> <NA> 0.071 27.000 0.899 0.908 0.592 0.001 1.009 SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
In [18]:
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
Out[18]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion imputer pca encoder
1 0.774 0.742 0.806 0.032 99.000 1,235.000 16.000 4.000 0.925 entropy SimpleImputer(strategy='median') PCA('mle') CustomOrdinalEncoder()
2 0.761 0.702 0.820 <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None OneHotEncoder()
3 0.746 0.700 0.791 0.869 98.000 1,537.000 15.000 4.000 0.622 gini SimpleImputer(strategy='median') None CustomOrdinalEncoder()
4 0.745 0.703 0.787 0.757 44.000 745.000 33.000 6.000 0.608 gini SimpleImputer() None CustomOrdinalEncoder()
5 0.737 0.716 0.757 0.528 70.000 1,003.000 37.000 19.000 0.530 gini SimpleImputer(strategy='median') PCA('mle') CustomOrdinalEncoder()
In [19]:
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
Out[19]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI C imputer scaler pca encoder
1 0.759 0.711 0.807 0.000 SimpleImputer(strategy='median') MinMaxScaler() PCA('mle') OneHotEncoder()
2 0.759 0.712 0.806 0.000 SimpleImputer(strategy='most_frequent') MinMaxScaler() None OneHotEncoder()
3 0.757 0.711 0.803 <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
4 0.748 0.702 0.793 95.634 SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
5 0.723 0.705 0.741 0.000 SimpleImputer(strategy='median') StandardScaler() None CustomOrdinalEncoder()

BayesSearchCV Performance Over Time¶

In [20]:
results.plot_performance_across_trials(facet_by='model').show()
In [21]:
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()

Variable Performance Over Time¶

In [22]:
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()

Scatter Matrix¶

In [23]:
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
#                             height=1000, width=1000).show()

Variable Performance - Numeric¶

In [24]:
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
                                        height=800)
/usr/local/lib/python3.9/site-packages/statsmodels/nonparametric/smoothers_lowess.py:227: RuntimeWarning:

invalid value encountered in true_divide

In [25]:
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()

Variable Performance - Non-Numeric¶

In [26]:
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()

In [27]:
results.plot_score_vs_parameter(
    query='model == "RandomForestClassifier()"',
    parameter='max_features',
    size='max_depth',
    color='encoder',
)

In [28]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='max_depth'
# )
In [29]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='imputer'
# )

Best Model - Test Set Performance¶

In [30]:
test_predictions = best_estimator.predict(X_test)
test_predictions[0:10]
Out[30]:
array([0.34417795, 0.36143173, 0.46583925, 0.33332345, 0.21120017,
       0.27141023, 0.17140721, 0.40142613, 0.17645289, 0.22338202])
In [31]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)
In [32]:
evaluator.plot_actual_vs_predict_histogram()
In [33]:
evaluator.plot_confusion_matrix()
In [34]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)
Out[34]:
  Score Dummy (prior) Dummy (constant) Explanation
AUC 0.783 0.500 0.500 Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier)
True Positive Rate 0.525 0.000 1.000 52.5% of positive instances were correctly identified.; i.e. 31 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall
True Negative Rate 0.851 1.000 0.000 85.1% of negative instances were correctly identified.; i.e. 120 "Negative Class" labels were correctly identified out of 141 instances
False Positive Rate 0.149 0.000 1.000 14.9% of negative instances were incorrectly identified as positive; i.e. 21 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances
False Negative Rate 0.475 1.000 0.000 47.5% of positive instances were incorrectly identified as negative; i.e. 28 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances
Positive Predictive Value 0.596 0.000 0.295 When the model claims an instance is positive, it is correct 59.6% of the time; i.e. out of the 52 times the model predicted "Positive Class", it was correct 31 times; a.k.a precision
Negative Predictive Value 0.811 0.705 0.000 When the model claims an instance is negative, it is correct 81.1% of the time; i.e. out of the 148 times the model predicted "Negative Class", it was correct 120 times
F1 Score 0.559 0.000 0.456 The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
Precision/Recall AUC 0.618 0.295 0.295 Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats.
Accuracy 0.755 0.705 0.295 75.5% of instances were correctly identified
Error Rate 0.245 0.295 0.705 24.5% of instances were incorrectly identified
% Positive 0.295 0.295 0.295 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class"
Total Observations 200 200 200 There are 200 total observations; i.e. sample size
In [35]:
evaluator.plot_roc_auc_curve().show()
<Figure size 720x444.984 with 0 Axes>
In [36]:
evaluator.plot_precision_recall_auc_curve().show()
In [37]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
In [38]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
In [39]:
evaluator.calculate_lift_gain(return_style=True)
Out[39]:
  Gain Lift
Percentile    
5 0.14 2.71
10 0.22 2.20
15 0.32 2.15
20 0.44 2.20
25 0.51 2.03
30 0.58 1.92
35 0.64 1.84
40 0.71 1.78
45 0.76 1.69
50 0.83 1.66
55 0.85 1.54
60 0.86 1.44
65 0.86 1.33
70 0.88 1.26
75 0.92 1.22
80 0.97 1.21
85 0.97 1.14
90 1.00 1.11
95 1.00 1.05
100 1.00 1.00

Production Model - Test Set Performance¶

In [40]:
test_predictions = production_model.predict(X_test)
test_predictions[0:10]
Out[40]:
array([0.34417795, 0.36143173, 0.46583925, 0.33332345, 0.21120017,
       0.27141023, 0.17140721, 0.40142613, 0.17645289, 0.22338202])
In [41]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)
In [42]:
evaluator.plot_actual_vs_predict_histogram()
In [43]:
evaluator.plot_confusion_matrix()
In [44]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)
Out[44]:
  Score Dummy (prior) Dummy (constant) Explanation
AUC 0.783 0.500 0.500 Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier)
True Positive Rate 0.525 0.000 1.000 52.5% of positive instances were correctly identified.; i.e. 31 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall
True Negative Rate 0.851 1.000 0.000 85.1% of negative instances were correctly identified.; i.e. 120 "Negative Class" labels were correctly identified out of 141 instances
False Positive Rate 0.149 0.000 1.000 14.9% of negative instances were incorrectly identified as positive; i.e. 21 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances
False Negative Rate 0.475 1.000 0.000 47.5% of positive instances were incorrectly identified as negative; i.e. 28 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances
Positive Predictive Value 0.596 0.000 0.295 When the model claims an instance is positive, it is correct 59.6% of the time; i.e. out of the 52 times the model predicted "Positive Class", it was correct 31 times; a.k.a precision
Negative Predictive Value 0.811 0.705 0.000 When the model claims an instance is negative, it is correct 81.1% of the time; i.e. out of the 148 times the model predicted "Negative Class", it was correct 120 times
F1 Score 0.559 0.000 0.456 The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
Precision/Recall AUC 0.618 0.295 0.295 Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats.
Accuracy 0.755 0.705 0.295 75.5% of instances were correctly identified
Error Rate 0.245 0.295 0.705 24.5% of instances were incorrectly identified
% Positive 0.295 0.295 0.295 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class"
Total Observations 200 200 200 There are 200 total observations; i.e. sample size
In [45]:
evaluator.plot_roc_auc_curve().show()
<Figure size 720x444.984 with 0 Axes>
In [46]:
evaluator.plot_precision_recall_auc_curve().show()
In [47]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
In [48]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
In [49]:
evaluator.calculate_lift_gain(return_style=True)
Out[49]:
  Gain Lift
Percentile    
5 0.14 2.71
10 0.22 2.20
15 0.32 2.15
20 0.44 2.20
25 0.51 2.03
30 0.58 1.92
35 0.64 1.84
40 0.71 1.78
45 0.76 1.69
50 0.83 1.66
55 0.85 1.54
60 0.86 1.44
65 0.86 1.33
70 0.88 1.26
75 0.92 1.22
80 0.97 1.21
85 0.97 1.14
90 1.00 1.11
95 1.00 1.05
100 1.00 1.00